
##################################
## AIC for quasi=poisson

qaic <- function(mod){
  phi <- summary(mod)$dispersion
  #df <- mod$df.residual
  k <- mod$df.null-mod$df.residual
  ll <- logLik(update(mod,family="poisson"))[1]
  #out <- (-2)*(ll)/phi + 2*df
  out <- 2*k - 2*(ll)/phi
  return(out)
}


##################################
r3 <- function(x){round(x,3)}
r1 <- function(x){round(x,1)}

###################################
# Residual autocorrelation 

moranResid <- function(map,mod){
idz <- data.frame(ID=row.names(map));rez <- data.frame(ID=row.names(mod$model),RESID=mod$residuals); rez <- merge(idz,rez,by="ID",all.x=T,all.y=T); rez <- rez[order(as.numeric(as.character(rez$ID))),];rez$RESID[is.na(rez$RESID)] <- 0
return(moran.test(rez$RESID,W,randomisation=T))
}

moranResid_vil <- function(map,mod){
  idz <- data.frame(ID=row.names(sp.villages));rez <- data.frame(ID=row.names(mod$model),RESID=mod$residuals); rez <- merge(idz,rez,by="ID",all.x=T,all.y=T); rez <- rez[order(as.numeric(as.character(rez$ID))),];rez$RESID[is.na(rez$RESID)] <- 0;
  return(moran.test(rez$RESID,Wpt,randomisation=F))
}



###################################

# This function performs a 2SLS regression calculating the usual regression output, a weak identification F-statistic, the Wu-Hausman test of endogeneity, and, in the case where there is more than one-instrument, a Sargan test. 
# The weak identification statistic is used to determine whether the instrument(s) is(are) sufficiently correlated with the endogenous variable of interest. The ‘rule-of-thumb’ critical statistic here is ten. 
# A Wu-Hausman test examines the difference between the IV and OLS coefficients. Rejecting the null hypothesis indicates the presence of endogeneity. 
# Finally, the Sargan over-identification test is used in the cases where there are more instruments than endogenous regressors. A rejection of the null in this test means that the instruments are not exclusively affecting the outcome of interest though the endogenous variable.


ivreg2 <- function(form,endog,iv,data,digits=3){
  # library(MASS)
  # model setup
  r1 <- lm(form,data)
  y <- r1$fitted.values+r1$resid
  x <- model.matrix(r1)
  aa <- rbind(endog == colnames(x),1:dim(x)[2]) 
  z <- cbind(x[,aa[2,aa[1,]==0]],data[,iv]) 
  colnames(z)[(dim(z)[2]-length(iv)+1):(dim(z)[2])] <- iv 
  # iv coefficients and standard errors
  z <- as.matrix(z)
  pz <- z %*% (solve(crossprod(z))) %*% t(z)
  biv <- solve(crossprod(x,pz) %*% x) %*% (crossprod(x,pz) %*% y)
  sigiv <- crossprod((y - x %*% biv),(y - x %*% biv))/(length(y)-length(biv))
  vbiv <- as.numeric(sigiv)*solve(crossprod(x,pz) %*% x)
  res <- cbind(biv,sqrt(diag(vbiv)),biv/sqrt(diag(vbiv)),(1-pnorm(biv/sqrt(diag(vbiv))))*2)
  res <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep=""),res)),nrow=dim(res)[1])
  rownames(res) <- colnames(x)
  colnames(res) <- c("Coef","S.E.","t-stat","p-val")
  # First-stage F-test
  y1 <- data[,endog]
  z1 <- x[,aa[2,aa[1,]==0]]
  bet1 <- solve(crossprod(z)) %*% crossprod(z,y1)
  bet2 <- solve(crossprod(z1)) %*% crossprod(z1,y1)
  rss1 <- sum((y1 - z %*% bet1)^2)
  rss2 <- sum((y1 - z1 %*% bet2)^2)
  p1 <- length(bet1)
  p2 <- length(bet2)
  n1 <- length(y)
  fs <- abs((rss2-rss1)/(p2-p1))/(rss1/(n1-p1))
  firststage <- c(fs)
  firststage <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep=""),firststage)),ncol=length(firststage))
  colnames(firststage) <- c("First Stage F-test")
  # Hausman tests
  bols <- solve(crossprod(x)) %*% crossprod(x,y)
  sigols <- crossprod((y - x %*% bols),(y - x %*% bols))/(length(y)-length(bols))
  vbols <- as.numeric(sigols)*solve(crossprod(x))
  sigml <- crossprod((y - x %*% bols),(y - x %*% bols))/(length(y))
  x1 <- x[,!(colnames(x) %in% "(Intercept)")]
  z1 <- z[,!(colnames(z) %in% "(Intercept)")]
  pz1 <- z1 %*% (solve(crossprod(z1))) %*% t(z1)
  biv1 <- biv[!(rownames(biv) %in% "(Intercept)"),]
  bols1 <- bols[!(rownames(bols) %in% "(Intercept)"),]
  # Durbin-Wu-Hausman chi-sq test:
  # haus <- t(biv1-bols1) %*% ginv(as.numeric(sigml)*(solve(crossprod(x1,pz1) %*% x1)-solve(crossprod(x1)))) %*% (biv1-bols1)
  # hpvl <- 1-pchisq(haus,df=1)
  # Wu-Hausman F test
  resids <- NULL
  resids <- cbind(resids,y1 - z %*% solve(crossprod(z)) %*% crossprod(z,y1))
  x2 <- cbind(x,resids)
  bet1 <- solve(crossprod(x2)) %*% crossprod(x2,y)
  bet2 <- solve(crossprod(x)) %*% crossprod(x,y)
  rss1 <- sum((y - x2 %*% bet1)^2)
  rss2 <- sum((y - x %*% bet2)^2)
  p1 <- length(bet1)
  p2 <- length(bet2)
  n1 <- length(y)
  fs <- abs((rss2-rss1)/(p2-p1))/(rss1/(n1-p1))
  fpval <- 1-pf(fs, p1-p2, n1-p1)
  #hawu <- c(haus,hpvl,fs,fpval)
  hawu <- c(fs,fpval)
  hawu <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep=""),hawu)),ncol=length(hawu))
  #colnames(hawu) <- c("Durbin-Wu-Hausman chi-sq test","p-val","Wu-Hausman F-test","p-val")
  colnames(hawu) <- c("Wu-Hausman F-test","p-val") 
  # Sargan Over-id test
  ivres <- y - (x %*% biv)
  oid <- solve(crossprod(z)) %*% crossprod(z,ivres)
  sstot <- sum((ivres-mean(ivres))^2)
  sserr <- sum((ivres - (z %*% oid))^2)
  rsq <- 1-(sserr/sstot)
  sargan <- length(ivres)*rsq
  spval <- 1-pchisq(sargan,df=length(iv)-1)
  overid <- c(sargan,spval)
  overid <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep=""),overid)),ncol=length(overid))
  colnames(overid) <- c("Sargan test of over-identifying restrictions","p-val")
  if(length(iv)-1==0){
    overid <- t(matrix(c("No test performed. Model is just identified")))
    colnames(overid) <- c("Sargan test of over-identifying restrictions")
  }
  full <- list(results=res, weakidtest=firststage, endogeneity=hawu, overid=overid)
  return(full)
}


# mod <- ivreg2(N_VILLAGES~HOUSES_PRE+CONVOYS_VIL+COERCION+CONTROL_AREA+ETH_RUS+NEWTER+DIST_RAIL+OPEN_TERRAIN+NUM_SELSOVIETS+URBAN100,endog="CONVOYS_VIL",iv=c("CENT_BTWN_VIL"),data=as.data.frame(map));mod$results
# mod$weakidtest
# mod$endogeneity
# mod$overid



# form <- as.formula(HOUSES_DESTROYED~HOUSES_PRE+CONVOYS_VIL+CONTROL_AREA+ETH_RUS+NEWTER+DIST_RAIL+OPEN_TERRAIN+NUM_SELSOVIETS+URBAN100)
# data <- as.data.frame(map)
# endog <- "CONVOYS_VIL"
# iv <- "CENT_BTWN_VIL"


#########################################################


quantile_normalisation <- function(df){
  df_rank <- apply(df,2,rank,ties.method="first")
  df_sorted <- data.frame(apply(df, 2, sort))
  df_mean <- apply(df_sorted, 1, mean)
  index_to_mean <- function(my_index, my_mean){return(my_mean[my_index])}  
  df_final <- apply(df_rank, 2, index_to_mean, my_mean=df_mean)
  rownames(df_final) <- rownames(df)
  return(df_final)
}

#########################################################



require(mvtnorm)
predmanQF <- function(mod,var,vals,quadratic=F,fax=NULL){
  nsim<-10000
  # vars <- unlist(strsplit(as.character(mod$formula)[3],split=" + ",fixed=T))
  vars <- names(coefficients(mod))
  n <- length(vals)
  X <- as.data.frame(matrix(NA,nrow=n,ncol=length(vars)))
  names(X) <- names(coefficients(mod))
  vars. <- vars[!(grepl("as.factor",vars,fixed=T)|grepl("Intercept",vars,fixed=T))]
  X[,vars.] <- as.data.frame(matrix(apply(mod$model[,vars.],2,function(x){median(x,na.rm=T)}),nrow=n,ncol=length(vars.),byrow=T))
  X[,"(Intercept)"] <- 1
  X[,var] <- vals
  if(quadratic==T){
    quadz <- names(mod$model)[grep("I(",names(mod$model),fixed=T)]
    for(k in 1:length(quadz)){
      quadd <- names(mod$model)[apply(as.data.frame(names(mod$model)),1,function(x){grepl(x,quadz[k])})]
      X[,quadz[k]] <- X[,quadd]^2}}
  if(length(fax)>0){
    for(k in 1:length(fax)){
      X[,vars==paste("as.factor(",names(fax[k]),")",fax[[k]],sep="")]<-0
      X[,which(grepl("as.factor",vars)&grepl(names(fax[k]),vars)&(!grepl(fax[[k]],vars)))] <- 0
      X[,which(grepl("as.factor",vars)&grepl(names(fax[k]),vars)&grepl(fax[[k]],vars))] <- 1
    }}
  betas <- rmvnorm(n=nsim,mean=coefficients(mod),sigma=vcov(mod))
  mu <- betas%*%t(X)
  pred.y <- mod$family$linkinv(mu)
  y <- apply(pred.y,2,median)
  y.lo <- apply(pred.y,2,function(x){quantile(x,.025)})
  y.hi <- apply(pred.y,2,function(x){quantile(x,.975)})
  rrs <- 100*(pred.y[,ncol(pred.y)]/pred.y[,1]-1)
  rr <- mean(rrs)
  rr.lo <- quantile(rrs,.025)
  rr.up <- quantile(rrs,.975)
  return(list(y=y,lo=y.lo,up=y.hi, rr=rr,rr.lo=rr.lo,rr.up=rr.up))}

#########################################################

# var <- "CONVOYS_IV"
# xlab <- "X"
# ylab <- "Y"
# vals <- xs
# mod
# var="CONVOYS_VIL_IV"
# xlab="Convoys derailed by partisans"
# ylab="Houses destroyed by Germans"
# cax=.9
# head(mod$model)
# vars.%in%names(mod$model)

require(mvtnorm)
predmanME <- function(mod,var,vals,quadratic=F,fax=NULL){
  nsim<-10000
  # vars <- unlist(strsplit(as.character(mod$formula)[3],split=" + ",fixed=T))
	# vars <- names(mod$model)
  vars <- names(coefficients(mod))
  vars <- gsub(")vec",").vec",vars)
  n <- length(vals)
  X <- as.data.frame(matrix(NA,nrow=n,ncol=length(vars)))
  names(X) <- names(coefficients(mod))
  # names(X) <- vars
  vars. <- vars[!(grepl("as.factor",vars,fixed=T)|(grepl("fitted",vars,fixed=T))|grepl("Intercept",vars,fixed=T))]
  X[,vars.] <- as.data.frame(matrix(apply(mod$model[,vars.],2,function(x){median(x,na.rm=T)}),nrow=n,ncol=length(vars.),byrow=T))
  X[,"(Intercept)"] <- 1
  X[,var] <- vals
  if(quadratic==T){
    quadz <- names(mod$model)[grep("I(",names(mod$model),fixed=T)]
    for(k in 1:length(quadz)){
      quadd <- names(mod$model)[apply(as.data.frame(names(mod$model)),1,function(x){grepl(x,quadz[k])})]
      X[,quadz[k]] <- X[,quadd]^2}}
  if(length(fax)>0){
    for(k in 1:length(fax)){
X[,vars==paste("as.factor(",names(fax[k]),")",fax[[k]],sep="")]<-0
X[,which(grepl("as.factor",vars)&grepl(names(fax[k]),vars)&(!grepl(fax[[k]],vars)))] <- 0
X[,which(grepl("as.factor",vars)&grepl(names(fax[k]),vars)&grepl(fax[[k]],vars))] <- 1
    }}
    for(k in 1:length(grep("fitted",names(X)))){
    X[,grep("fitted",names(X))[k]]<-apply(mod$model[,grep("fitted",names(mod$model))],2,median)[k]}
  betas <- rmvnorm(n=nsim,mean=coefficients(mod),sigma=vcov(mod))
  mu <- betas%*%t(X)
  pred.y <- mod$family$linkinv(mu)
  y <- apply(pred.y,2,median)
  y.lo <- apply(pred.y,2,function(x){quantile(x,.025)})
  y.hi <- apply(pred.y,2,function(x){quantile(x,.975)})
  rrs <- 100*(pred.y[,ncol(pred.y)]/pred.y[,1]-1)
  rr <- mean(rrs)
  rr.lo <- quantile(rrs,.025)
  rr.up <- quantile(rrs,.975)
  return(list(y=y,lo=y.lo,up=y.hi, rr=rr,rr.lo=rr.lo,rr.up=rr.up))}




#########################################################

predmanGAM <- function(mod,var,vals,quadratic=F,fax=NULL){
  nsim<-10000
  vars <- names(mod$model)
  n <- length(vals)
  X <- as.data.frame(matrix(NA,nrow=n,ncol=length(vars)))
  names(X) <- vars
  vars. <- vars[!(grepl("as.factor",vars,fixed=T)|grepl("Intercept",vars,fixed=T))]
  X[,vars.] <- as.data.frame(matrix(apply(mod$model[,vars.],2,function(x){mean(x,na.rm=T)}),nrow=n,ncol=length(vars.),byrow=T))
  # X[,"(Intercept)"] <- 1
  X[,var] <- vals
  for(j in 1:length(vars.)){X[,vars.[j]] <- as.numeric(as.character(X[,vars.[j]]))}
  if(quadratic==T){
    quadz <- names(mod$model)[grep("I(",names(mod$model),fixed=T)]
    for(k in 1:length(quadz)){
      quadd <- names(mod$model)[apply(as.data.frame(names(mod$model)),1,function(x){grepl(x,quadz[k])})]
      X[,quadz[k]] <- X[,quadd]^2}}
  if(length(fax)>0){
    for(k in 1:length(fax)){
X[,which(grepl(names(fax[k]),vars))] <- as.factor(fax[[k]])
    }}
    X <- as.data.frame(X)
    names(X) <- gsub(")","",gsub("as.factor(","",names(X),fixed=T))
    preds <- predict.gam(mod,newdata=X,type="link",se.fit=T)
	preds <- cbind(preds$fit,preds$fit-1.96*preds$se.fit,preds$fit+1.96*preds$se.fit)
    preds <- mod$family$linkinv(preds)    
  y <- preds[,1]
  y.lo <- preds[,2]
  y.hi <- preds[,3]
  rr <- 100*(y[length(y)]/y[1]-1)
  rr.lo <- 100*(y.lo[length(y.lo)]/y.lo[1]-1)
  rr.up <- 100*(y.hi[length(y.hi)]/y.hi[1]-1)
  return(list(y=y,lo=y.lo,up=y.hi, rr=rr,rr.lo=rr.lo,rr.up=rr.up))}


#####################################################

# var <- "CONVOYS_VIL_IV"
# xlab <- "X"
# ylab <- "Y"
# mod
# var="CONVOYS_IV"
# xlab="Convoys derailed by partisans"
# ylab="Houses destroyed by Germans"
# cax=.9

contplot <- function(mod,var,xlab="X",ylab="Y",main="",cax=1,fax=NULL){
xs <- seq(min(mod$model[,var],na.rm=T),max(mod$model[,var],na.rm=T),length.out=100)
preds <- predmanQF(mod,var,vals=xs,fax=fax)
pred.mean <- preds$y
pred.lo <- preds$lo
pred.hi <- preds$up
par(mar=c(4,4,.5,.5))
plot(x=xs,y=pred.mean,type="l",ylim=c(min(pred.lo),max(pred.hi)),main=main,ylab=ylab,xlab=xlab,cex.axis=.9,cex.lab=cax,cex.main=1.2,bty="l")
lines(x=xs,y=pred.lo,lty=2)
lines(x=xs,y=pred.hi,lty=2)
rug(jitter(mod$model[,var]))
}



contplot <- function(mod,var,xlab="X",ylab="Y",main="",cax=1,fax=NULL){
xs <- seq(min(mod$model[,var],na.rm=T),max(mod$model[,var],na.rm=T),length.out=100)
preds <- predmanQF(mod,var,vals=xs,fax=fax)
pred.mean <- preds$y
pred.lo <- preds$lo
pred.hi <- preds$up
par(mar=c(4,4,.5,.5))
plot(x=xs,y=pred.mean,type="l",ylim=c(min(pred.lo),max(pred.hi)),main=main,ylab=ylab,xlab=xlab,cex.axis=.9,cex.lab=cax,cex.main=1.2,bty="l")
# lines(x=xs,y=pred.lo,lty=2)
# lines(x=xs,y=pred.hi,lty=2)
polygon(x=c(xs,rev(xs)),y=c(pred.lo,rev(pred.hi)),col="gray",border="NA")
lines(x=xs,y=pred.mean)
rug(jitter(mod$model[,var]))
}



contplot_GAM <- function(mod,var,xlab="X",ylab="Y",main="",cax=1,fax=NULL,col.ci="gray"){
xs <- seq(min(mod$model[,var],na.rm=T),max(mod$model[,var],na.rm=T),length.out=100)
preds <- predmanGAM(mod,var,vals=xs,fax=fax)
pred.mean <- preds$y
pred.lo <- preds$lo
pred.hi <- preds$up
par(mar=c(4,4,.5,.5))
plot(x=xs,y=pred.mean,type="l",ylim=c(min(pred.lo),max(pred.hi)),main=main,ylab=ylab,xlab=xlab,cex.axis=.9,cex.lab=cax,cex.main=1.2,bty="l")
# lines(x=xs,y=pred.lo,lty=2)
# lines(x=xs,y=pred.hi,lty=2)
polygon(x=c(xs,rev(xs)),y=c(pred.lo,rev(pred.hi)),col=col.ci,border="NA")
lines(x=xs,y=pred.mean)
rug(jitter(mod$model[,var]))
}

contplot_ME <- function(mod,var,xlab="X",ylab="Y",main="",cax=1,fax=NULL){
xs <- seq(min(mod$model[,var],na.rm=T),max(mod$model[,var],na.rm=T),length.out=100)
preds <- predmanME(mod,var,vals=xs,fax=fax)
pred.mean <- preds$y
pred.lo <- preds$lo
pred.hi <- preds$up
par(mar=c(4,4,.5,.5))
plot(x=xs,y=pred.mean,type="l",ylim=c(min(pred.lo),max(pred.hi)),main=main,ylab=ylab,xlab=xlab,cex.axis=.9,cex.lab=cax,cex.main=1.2,bty="l")
# lines(x=xs,y=pred.lo,lty=2)
# lines(x=xs,y=pred.hi,lty=2)
polygon(x=c(xs,rev(xs)),y=c(pred.lo,rev(pred.hi)),col="gray",border="NA")
lines(x=xs,y=pred.mean)
rug(jitter(mod$model[,var]))
}




#################################################


binplot <- function(mod,var,labname,main,cax=1){
X <- apply(prematch[,unlist(strsplit(gsub(")","",gsub("s(","",mod$formula,fixed=T))[3],split=" + ",fixed=T))],2,function(x){mean(x,na.rm=T)})
X <- as.data.frame(matrix(X,nrow=2,ncol=length(X),byrow=T))
names(X) <- unlist(strsplit(gsub(")","",gsub("s(","",mod$formula,fixed=T))[3],split=" + ",fixed=T))
X[,var] <- 0:1
if("DIST_TO_RAIL"%in%names(X)){X$RAIL2 <- X$DIST_TO_RAIL*X$DIST_TO_RAIL}

preds <- predict(mod,newdata=X,type="link",se.fit=T)
pred.mean <- preds$fit
pred.lo <- preds$fit-1.96*preds$se.fit
pred.hi <- preds$fit+1.96*preds$se.fit
pred.mean <- 1/(1+exp(-pred.mean))
pred.lo <- 1/(1+exp(-pred.lo))
pred.hi <- 1/(1+exp(-pred.hi))

par(mar=c(4,4,2,.5))
plot(x=X[,var],y=pred.mean,type="p",ylim=c(min(pred.lo),max(pred.hi)),main=main,ylab="Prob. of Resettlement",xlab=labname,cex.axis=.9,cex.lab=cax,cex.main=1.2,xlim=c(-2.5,3.5),xaxt="n",pch=1,col=NA)
axis(1,at=0:1)

segments(x0=seq(X[1,var]-.3,X[1,var]+.3,length.out=100),x1=seq(X[1,var]-.3,X[1,var]+.3,length.out=100),y0=pred.lo[1],y1=pred.hi[1],lty=1,col="grey")
segments(x0=seq(X[2,var]-.3,X[2,var]+.3,length.out=100),x1=seq(X[2,var]-.3,X[2,var]+.3,length.out=100),y0=pred.lo[2],y1=pred.hi[2],lty=1,col="grey")

segments(x0=X[1,var]-.3,x1=X[1,var]+.3,y0=pred.mean[1],y1=pred.mean[1],lty=1)
segments(x0=X[2,var]-.3,x1=X[2,var]+.3,y0=pred.mean[2],y1=pred.mean[2],lty=1)
segments(x0=X[1,var]-.3,x1=X[1,var]+.3,y0=pred.lo[1],y1=pred.lo[1],lty=2)
segments(x0=X[2,var]-.3,x1=X[2,var]+.3,y0=pred.lo[2],y1=pred.lo[2],lty=2)
segments(x0=X[1,var]-.3,x1=X[1,var]+.3,y0=pred.hi[1],y1=pred.hi[1],lty=2)
segments(x0=X[2,var]-.3,x1=X[2,var]+.3,y0=pred.hi[2],y1=pred.hi[2],lty=2)
CurlyBraces(1.5, (pred.mean[2]-pred.mean[1])/2+pred.mean[1], pred.mean[2]-pred.mean[1], pos = 1, direction = 1 )
#text(2,(pred.mean[2]-pred.mean[1])/2+pred.mean[1],paste(ifelse(pred.mean[2]-pred.mean[1]>0,"+ ","- "),round(pred.mean[2]-pred.mean[1],2),"\n","(",round(min(c(pred.lo[2]-pred.lo[1],pred.hi[2]-pred.hi[1])),2),",",round(max(c(pred.lo[2]-pred.lo[1],pred.hi[2]-pred.hi[1])),2),")",sep=""),pos=4,cex=.8)
text(2,(pred.mean[2]-pred.mean[1])/2+pred.mean[1],paste(if(pred.mean[2]-pred.mean[1]>0){"+ "},round(pred.mean[2]-pred.mean[1],2),sep=""),pos=4,cex=.8)
}






########################################
## Google geocode
########################################


#install.packages("RCurl")
library(RCurl)
#install.packages("RJSONIO")
library(RJSONIO)
#install.packages("plyr")
library(plyr)

## Google geocode

url <- function(address, return.call = "json", sensor = "false") {
  root <- "http://maps.google.com/maps/api/geocode/"
  u <- paste(root, return.call, "?address=", address, "&sensor=", sensor, sep = "")
  return(URLencode(u))
}


geoCode <- function(address,verbose=FALSE) {
  if(verbose) cat(address,"\n")
  u <- url(address)
  doc <- getURL(u)
  x <- fromJSON(doc,simplify = FALSE)
  if(x$status=="OK") {
    lat <- x$results[[1]]$geometry$location$lat
    lng <- x$results[[1]]$geometry$location$lng
    location_type  <- x$results[[1]]$geometry$location_type
    formatted_address  <- x$results[[1]]$formatted_address
    return(c(lat, lng, location_type, formatted_address))
    Sys.sleep(0.5)
  } else {
    return(c(NA,NA,NA, NA))
  }
}


########################################
## Yandex geocode
########################################

# query <- tweets$LOCATION[i]
# query <- "32.752625,-97.138904"
# query <- "Hong Kong"
require(stringr)

url2 <- function(address, return.call = "json", sensor = "false") {
  root <- "http://geocode-maps.yandex.ru/1.x/?format=json&lang=en-BR&geocode="
  u <- paste(root, address, sep = "")
  return(URLencode(u))
}


geoCode2 <- function(query,match.num=1){
# target=paste0("http://geocode-maps.yandex.ru/1.x/?format=json&lang=en-BR&geocode=",query)
  # rd <- readLines(target, warn="F",encoding="UTF-8") 
  # dat <- fromJSON(rd)
  
  u <- url2(query)
  doc <- getURL(u)
  dat <- fromJSON(doc,simplify = FALSE)

#Exctract address and location data
if(length(dat$response$GeoObjectCollection$featureMember)>0){
address <- ifelse(length(dat$response$GeoObjectCollection$featureMember[[match.num]]$GeoObject$metaDataProperty$GeocoderMetaData$AddressDetails$Country)>2,dat$response$GeoObjectCollection$featureMember[[match.num]]$GeoObject$metaDataProperty$GeocoderMetaData$AddressDetails$Country$AddressLine,dat$response$GeoObjectCollection$featureMember[[match.num]]$GeoObject$metaDataProperty$GeocoderMetaData$AddressDetails$Country[2])
if(address=="NULL"){address<-NA}
pos <- dat$response$GeoObjectCollection$featureMember[[match.num]]$GeoObject$Point
# temp <- unlist(str_split(pos," "))
temp <- unlist(str_split(unlist(pos)," "))
latitude=as.numeric(temp)[1]
longitude=as.numeric(temp)[2]}

if(length(dat$response$GeoObjectCollection$featureMember)==0){address<- NA;longitude=NA;latitude=NA}

return(c(latitude,longitude,address))

}




################################################################
## Summary stats
################################################################

# data <- data
# varz <- c("Pub","HARDNEWS_WS")


sumstat <- function(data,varz,labs=NULL){
  sum.mat <- data.frame(Obs=apply(data[,varz],2,function(x){sum(!is.na(x))}),
                        Mean=apply(data[,varz],2,function(x){mean(x,na.rm=T)}),
                        Median=apply(data[,varz],2,function(x){median(x,na.rm=T)}),
                        SD=apply(data[,varz],2,function(x){sd(x,na.rm=T)}),
                        Min=apply(data[,varz],2,function(x){min(x,na.rm=T)}),
                        Max=apply(data[,varz],2,function(x){max(x,na.rm=T)}))
  if(length(labs)==0){labs <- varz}
  row.names(sum.mat) <- labs
  return(sum.mat)
}




#################################################
## Nils' functions
#################################################




streg.sort <- function(data, unitvar, timevar) {
  data[order(data[,unitvar], data[,timevar]),]
}

streg.tlag <- function(data, timevar, lagvar, order=1) {
  result <- numeric(nrow(data))
  timevalues <- data[,timevar]
  varvalues <- data[,lagvar]
  times <- unique(timevalues)
  
  # set initial lag values to NA
  for (time in 1:order) {
    result[timevalues==times[time]] <- NA
  }
  
  # compute lag
  for (time in (order+1):length(times)) {
    result[timevalues==times[time]] <- varvalues[timevalues==times[time-order]]
  }
  result
}


streg.ntlag <- function(data, timevar, lagvar, order=1) {
  result <- numeric(nrow(data))
  timevalues <- data[,timevar]
  varvalues <- data[,lagvar]
  times <- unique(timevalues)
  
  # set initial lag values to NA
  for (time in length(times):(length(times)-order)) {
    result[timevalues==times[time]] <- NA
  }
  
  # compute lag
  for (time in 1:(length(times)-order)) {
    result[timevalues==times[time]] <- varvalues[timevalues==times[time+order]]
  }
  result
}


streg.slag <- function(data, timevar, lagvar, Wmat) {
  result <- numeric(nrow(data))
  timevalues <- data[,timevar]
  varvalues <- data[,lagvar]
  times <- unique(timevalues)
  for (time in 1:length(times)) {
    result[timevalues==times[time]] <- Wmat %*% varvalues[timevalues==times[time]]
  }
  result
}

